## Sourobh Ghosh, Casey Kearney, Mahdi Hashemian
## Gov 2001 Replication Project

rm(list = ls())

require(readstata13)
require(sandwich)


setwd("/Users/mhashemi/Dropbox (MIT)/Gov 2001 Replication Paper/Data")  ## modify this as necessary

data = read.dta13("SovietCollaboration_CitationToSovietArtDataset.dta")
t5<-data

##### robust cluster SEs function (http://www.r-bloggers.com/easy-clustered-standard-errors-in-r/)

get_CL_vcov<-function(model, cluster){
  require(sandwich)
  require(lmtest)
  
  #calculate degree of freedom adjustment
  M <- length(unique(cluster))
  N <- length(cluster)
  K <- model$rank
  dfc <- (M/(M-1))*((N-1)/(N-K))
  
  #calculate the uj's
  uj  <- apply(estfun(model),2, function(x) tapply(x, cluster, sum))
  
  #use sandwich to get the var-covar matrix
  vcovCL <- dfc*sandwich(model, meat=crossprod(uj)/N)
  return(vcovCL)
}

##Count of Soviet References-Simple
m1 <- lm(sovietpubreferences ~ aftersoviet
           + factor(adjustedmsc) + factor(year), data = t5)
#Call Robust SE
m1.vcovCL<-get_CL_vcov(m1, t5$adjustedmsc)
#Creating the Results
coeftest(m1, m1.vcovCL)  ## corresponding robust cluster SE
summary(m1)$r.squared  ## R-squared


##Percentage of Soviet references-Simple
m2 <- lm(percentagesovietjournalref ~ aftersoviet
         + factor(adjustedmsc) + factor(year), data = t5)
#Call Robust SE
m2.vcovCL<-get_CL_vcov(m2, t5$adjustedmsc)
#Creating the Results
coeftest(m2, m2.vcovCL)  ## corresponding robust cluster SE
summary(m2)$r.squared  ## R-squared

##Count of Soviet references-Complete
m3 <- lm(sovietpubreferences ~ aftersoviet + logauthorcount:aftersoviet 
         + logauthorcount:sovietdummy + logauthorcount:afterdummy + logauthorcount
         + factor(adjustedmsc) + factor(year), data = t5)
#Call Robust SE
m3.vcovCL<-get_CL_vcov(m3, t5$adjustedmsc)
#Creating the Results
coeftest(m3, m3.vcovCL)  ## corresponding robust cluster SE
summary(m3)$r.squared  ## R-squared


##Percentage of Soviet references-Complete
m4 <- lm(percentagesovietjournalref ~ aftersoviet + logauthorcount:aftersoviet 
          + logauthorcount:sovietdummy + logauthorcount:afterdummy + logauthorcount
             + factor(adjustedmsc) + factor(year), data = t5)
#Call Robust SE
m4.vcovCL<-get_CL_vcov(m4, t5$adjustedmsc)
#Creating the Results
coeftest(m4, m4.vcovCL)  ## corresponding robust cluster SE
summary(m4)$r.squared  ## R-squared


#Preparing results for table
require(stargazer)
stargazer(m1,m2,m3,m4)
